services:
  agent:
    command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/document_based_qa_transformers/pipeline_conf.json'
    environment:
      WAIT_HOSTS: "sentseg:8011, combined-classification:8087, ranking-based-response-selector:8002,
        sentence-ranker:8128, transformers-lm-mistral-7b-128k:8185, doc-retriever:8165, dff-document-qa-transformers-llm-skill:8186"
      WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000}
      HIGH_PRIORITY_INTENTS: 1
      RESTRICTION_FOR_SENSITIVE_CASE: 1
      ALWAYS_TURN_ON_ALL_SKILLS: 0
      LANGUAGE: EN

  files:
    image: julienmeerschart/simple-file-upload-download-server

  sentseg:
    env_file: [ .env ]
    build:
      context: ./annotators/SentSeg/
      dockerfile: Dockerfile-test
    command: flask run -h 0.0.0.0 -p 8011
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 1.5G
        reservations:
          memory: 1.5G

  doc-retriever:
    env_file: [ .env ]
    build:
      context: .
      dockerfile: ./annotators/doc_retriever/Dockerfile
      args:
        SERVICE_PORT: 8165
        SERVICE_NAME: doc_retriever
        CONFIG_PATH: ./doc_retriever_config.json
        DOC_PATH_OR_LINK: http://files.deeppavlov.ai/dream_data/documents_for_qa/test_file_dream_repo.html,http://files.deeppavlov.ai/dream_data/documents_for_qa/alphabet_financial_report.txt,http://files.deeppavlov.ai/dream_data/documents_for_qa/test_file_jurafsky_chatbots.pdf
        PARAGRAPHS_NUM: 5
        FILE_SERVER_TIMEOUT: 30
    command: python -m flask run -h 0.0.0.0 -p 8165
    environment:
      - FLASK_APP=server
      - CUDA_VISIBLE_DEVICES=0
    deploy:
      resources:
        limits:
          memory: 5G
        reservations:
          memory: 5G

  combined-classification:
    env_file: [ .env ]
    build:
      args:
        CONFIG: combined_classifier.json
        SERVICE_PORT: 8087
      context: .
      dockerfile: ./annotators/combined_classification/Dockerfile
    command: gunicorn --workers=1 server:app -b 0.0.0.0:8087 --timeout 600
    environment:
      - CUDA_VISIBLE_DEVICES=0
    deploy:
      resources:
        limits:
          memory: 2G
        reservations:
          memory: 2G

  ranking-based-response-selector:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8002
        SERVICE_NAME: response_selector
        LANGUAGE: EN
        SENTENCE_RANKER_ANNOTATION_NAME: sentence_ranker
        SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond
        SENTENCE_RANKER_TIMEOUT: 3
        N_UTTERANCES_CONTEXT: 5
        FILTER_TOXIC_OR_BADLISTED: 1
      context: .
      dockerfile: ./response_selectors/ranking_based_response_selector/Dockerfile
    command: flask run -h 0.0.0.0 -p 8002
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 100M
        reservations:
          memory: 100M

  sentence-ranker:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8128
        SERVICE_NAME: sentence_ranker
        PRETRAINED_MODEL_NAME_OR_PATH: sentence-transformers/all-MiniLM-L6-v2
      context: ./services/sentence_ranker/
    command: flask run -h 0.0.0.0 -p 8128
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 3G
        reservations:
          memory: 3G

  transformers-lm-mistral-7b-128k:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8185
        SERVICE_NAME: transformers_lm_mistral_7b_128k
        PRETRAINED_MODEL_NAME_OR_PATH: NousResearch/Yarn-Mistral-7b-128k
        HALF_PRECISION: 1
        USE_FLASH_ATTENTION_2: 1
      context: .
      dockerfile: ./services/transformers_lm/Dockerfile
    command: flask run -h 0.0.0.0 -p 8185
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 50G
        reservations:
          memory: 50G

  dff-document-qa-transformers-llm-skill:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8186
        SERVICE_NAME: dff_document_qa_llm_skill
        GENERATIVE_SERVICE_URL: http://transformers-lm-mistral-7b-128k:8185/respond
        GENERATIVE_SERVICE_CONFIG: transformers_mistral.json
        GENERATIVE_TIMEOUT: 120
        N_UTTERANCES_CONTEXT: 7
        FILE_SERVER_TIMEOUT: 30
        DOCUMENT_PROMPT_FILE: common/prompts/document_qa_instruction.json
      context: .
      dockerfile: ./skills/dff_document_qa_llm_skill/Dockerfile
    deploy:
      resources:
        limits:
          memory: 128M
        reservations:
          memory: 128M

version: '3.7'
